Load packages

Build dataframe - reviewers should not run this!

REVIEWERS DO NOT RUN THIS CHUNK! THIS IS FOR DATA PRODUCT LEADS ONLY!

Read in built dataframe - reviewers start here!

REVIEWERS PLEASE START HERE AND THANK YOU FOR YOUR EYES ON ALL THE DATA! :-)

# THIS LINK SHOULD BE UPDATED WITH THE MOST RECENT PASTA LINK FROM THE EDI STAGING ENVIRONMENT!
current_df <- read_csv('https://raw.githubusercontent.com/melofton/Reservoirs/refs/heads/master/Data/DataAlreadyUploadedToEDI/EDIProductionFiles/MakeEMLFluoroProbe/2024/FluoroProbe_2014_2024.csv') %>%
  mutate(DateTime = force_tz(DateTime, tzone = "America/New_York"))
## Rows: 118502 Columns: 33
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (1): Reservoir
## dbl  (31): Site, CastID, Depth_m, GreenAlgae_ugL, Bluegreens_ugL, BrownAlgae...
## dttm  (1): DateTime
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# double-check time zone
head(current_df$DateTime)
## [1] "2014-05-05 13:08:52 EDT" "2014-05-05 13:08:53 EDT"
## [3] "2014-05-05 13:08:54 EDT" "2014-05-05 13:08:55 EDT"
## [5] "2014-05-05 13:08:57 EDT" "2014-05-05 13:08:58 EDT"
hist(hour(current_df$DateTime))

this_year <- current_df %>%
  filter(year(DateTime) == 2024) 

Check flags

This section checks to make sure each observation has a data flag. It also checks to make sure the frequency of flags match what we expect to see.

#make sure no NAS in the Flag columns
Flags=current_df%>%
  select(DateTime, starts_with("Flag"))

RowsNA=Flags[!complete.cases(Flags), ] # Keep only the complete rows

#check the flag column
Flags=current_df%>%
  select(starts_with("Flag"))

# Make a table with the number of times a flag was used
for(f in 1:(ncol(Flags))){
  #print(colnames(Flags[f]))
  print(table(Flags[,f], useNA = "always"))
}
## Flag_GreenAlgae_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_Bluegreens_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_BrownAlgae_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_MixedAlgae_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_YellowSubstances_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_TotalConc_ugL
##     0     1  <NA> 
## 99806 18696     0 
## Flag_Temp_C
##     0     2  <NA> 
## 75538 42964     0 
## Flag_Transmission_perc
##      0      4   <NA> 
##   8272 110230      0 
## Flag_RFU_525nm
##      0      5   <NA> 
## 118065    437      0 
## Flag_RFU_570nm
##      0      5   <NA> 
## 117695    807      0 
## Flag_RFU_610nm
##      0      5   <NA> 
## 118411     91      0 
## Flag_RFU_370nm
##     0     1     5  <NA> 
## 99595 18696   211     0 
## Flag_RFU_590nm
##      0      5   <NA> 
## 118479     23      0 
## Flag_RFU_470nm
##     0     1     5  <NA> 
## 99438 18696   368     0

Plot all casts individually - reviewers should not run this!

Define heatmap function

flora_heatmap <- function(fp_data, reservoir, year, site, z){
  
  #subset to relevant data
  fp <- fp_data %>%
    filter(Reservoir == reservoir & year(DateTime) == year & Site == site) %>%
    select(CastID, DateTime, Depth_m, {{z}}) 
  
  #slice by depth for each reservoir
  if (reservoir == "FCR"){
    
    if(site == 50){
       depths = seq(0.1, 9.3, by = 0.3)
    } else if(site == 40){
      depths = seq(0.1, 8.5, by = 0.3)
    } else if(site == 30){
      depths = seq(0.1, 7, by = 0.3)
    } else if(site == 20){
      depths = seq(0.1, 4.5, by = 0.3)
    } else if(site == 10){
      depths = seq(0.1, 3.5, by = 0.3)
    }
  
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
fp_layer <- fp %>% 
  group_by(CastID) %>% 
  slice(which.min(abs(as.numeric(Depth_m) - depths[i])))

# Bind each of the data layers together.
df.final = bind_rows(df.final, fp_layer)

}


} else if (reservoir == "BVR"){
  
  depths = seq(0.1, 10, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  
} else if(reservoir == "CCR"){
  
  depths = seq(0.1, 20, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  } else if(reservoir == "GWR"){
  
  depths = seq(0.1, 12, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  }
  } else if(reservoir == "SHR"){
  
  depths = seq(0.1, 30, by = 0.3)
  df.final<-data.frame()
  
  for (i in 1:length(depths)){
    
    fp_layer<-fp %>% group_by(CastID) %>% slice(which.min(abs(as.numeric(Depth_m) - depths[i])))
    
    # Bind each of the data layers together.
    df.final = bind_rows(df.final, fp_layer)
    
  } 
  
  }
  
  #wrangle final dataframe for plotting
  # Re-arrange the data frame by date
  fp_new <- arrange(df.final, DateTime)

  # Round each extracted depth to the nearest 10th. 
  fp_new$Depth_m <- round(as.numeric(fp_new$Depth_m), digits = 0.5)
  
  # Convert to DOY
  fp_new$DOY <- yday(fp_new$DateTime)
  
  fig_title <- paste(reservoir, year, "Site", site, z, sep = " ")
  
  interp <- interp(x=fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]),
                      xo = seq(min(fp_new$DOY), max(fp_new$DOY), by = .1), 
                      yo = seq(min(fp_new$Depth_m), max(fp_new$Depth_m), by = 0.01),
                      extrap = T, linear = T, duplicate = "strip")
interp <- interp2xyz(interp, data.frame=T)
  
  p1 <- ggplot(interp, aes(x=x, y=y))+
  geom_raster(aes(fill=z))+
  scale_y_reverse(expand = c(0,0))+
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_gradientn(colours = blue2green2red(60), na.value="gray")+
  labs(x = "Day of year", y = "Depth (m)", title = fig_title,fill=expression(paste(mu,g/L)))+
  theme_bw()

print(p1)

}

Visualize current year at a glance

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 40, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 30, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 20, z = "TotalConc_ugL")

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2024, site = 10, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2024, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

Visualize a previous year at a glance

flora_heatmap(fp_data = current_df, reservoir = "FCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "BVR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "CCR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "SHR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

flora_heatmap(fp_data = current_df, reservoir = "GWR", year = 2016, site = 50, z = "TotalConc_ugL")
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## collinear points, trying to add some jitter to avoid colinearities!
## Warning in interp(x = fp_new$DOY, y = fp_new$Depth_m, z = unlist(fp_new[z]), :
## success: collinearities reduced through jitter

Code to check for negative RFUs

current_df |>
  select(c('DateTime','Depth_m', 'Reservoir', 'Site', starts_with('RFU'))) |>
  pivot_longer(RFU_370nm:RFU_610nm, values_to = 'RFU', names_to = 'wavelength') |> 
  filter(RFU < 0) |> 
  ggplot(aes(x=DateTime, y= Depth_m, colour = as_factor(wavelength))) + 
  facet_wrap(Reservoir~Site) + 
  geom_point()

current_df |>
  select(c('DateTime','Depth_m', 'Reservoir', 'Site', starts_with('RFU'))) |>
  pivot_longer(RFU_370nm:RFU_610nm, values_to = 'RFU', names_to = 'wavelength') |> 
  filter(RFU < 0) |> 
  reframe(.by = wavelength, 
          n = n())
## # A tibble: 6 × 2
##   wavelength     n
##   <chr>      <int>
## 1 RFU_570nm    807
## 2 RFU_610nm     91
## 3 RFU_525nm    437
## 4 RFU_590nm     23
## 5 RFU_370nm    218
## 6 RFU_470nm    375

Check to make sure that what is in the maintenance log was actually removed

Look at the last rows of the maintenance log

We want to make sure that our maintenance log actually worked and took out the values or changes those it was supposed to

## Rows: 28 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Reservoir, DataStream, TIMESTAMP_start, TIMESTAMP_end, start_parame...
## dbl (2): Site, flag
## lgl (2): Depth, update_value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 11
##   Reservoir  Site Depth DataStream TIMESTAMP_start TIMESTAMP_end start_parameter
##   <chr>     <dbl> <lgl> <chr>      <chr>           <chr>         <chr>          
## 1 FCR          20 NA    Flora      2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 2 FCR          10 NA    Flora      2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 3 BVR          50 NA    Flora      2014-05-01 00:… 2023-12-31 1… Transmission_p…
## 4 CCR          50 NA    Flora      2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 5 SHR          50 NA    Flora      2014-05-01 00:… 2023-12-31 2… Transmission_p…
## 6 GWR          50 NA    Flora      2014-05-01 00:… 2023-12-31 2… Transmission_p…
## # ℹ 4 more variables: end_parameter <chr>, flag <dbl>, update_value <lgl>,
## #   notes <chr>
Reservoir Site Depth DataStream TIMESTAMP_start TIMESTAMP_end start_parameter end_parameter flag update_value notes
FCR 20 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 23:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration
FCR 10 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 23:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration
BVR 50 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 11:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration
CCR 50 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 23:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration
SHR 50 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 23:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration
GWR 50 NA Flora 2014-05-01 00:00:00 EDT 2023-12-31 23:59:59 EDT Transmission_perc Transmission_perc 4 NA bad transmission sensor calibration

Check the that the columns have flags

Look at the first few rows of the data frame and check that the observations after the TIMESTAMP_start are flagged

Look at the first 5 rows for that time

## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(colname_start)
## 
##   # Now:
##   data %>% select(all_of(colname_start))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(colname_end)
## 
##   # Now:
##   data %>% select(all_of(colname_end))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Reservoir DateTime Transmission_perc Flag_Transmission_perc
FCR 2014-05-05 13:08:52 78.00 4
FCR 2014-05-05 13:08:53 95.32 4
FCR 2014-05-05 13:08:54 97.31 4
FCR 2014-05-05 13:08:55 94.15 4
FCR 2014-05-05 13:08:57 95.98 4
FCR 2014-05-05 13:08:58 95.74 4

Look at the last 6 rows for the maintenance time

Make sure the observations are flagged

Reservoir DateTime Transmission_perc Flag_Transmission_perc
FCR 2023-11-14 14:07:50 100 4
FCR 2023-11-14 14:07:52 100 4
FCR 2023-11-14 14:07:55 100 4
FCR 2023-11-14 14:07:57 100 4
FCR 2023-11-14 14:08:00 100 4
FCR 2023-11-14 14:08:02 100 4

Make site description file

 # These lines of code make the csv of the site descriptions with lat and long
 # MEL You don't need to run this if you already have the file I believe?

  # # Use Gsheet because you don't need to authenticate it. 
  # sites <- gsheet::gsheet2tbl("https://docs.google.com/spreadsheets/d/1TlQRdjmi_lzwFfQ6Ovv1CAozmCEkHumDmbg_L4A2e-8/edit#gid=1244423834")
  # #data<- read_csv("YOUR DATA.csv")# Use this if you read in a csv
  # data <- current_df #This is the line you need to modify!
  # trim_sites = function(data,sites){
  #   data_res_site=data%>% #Create a Reservoir/Site combo column
  #     mutate(res_site = trimws(paste0(Reservoir,Site)))
  #   sites_merged = sites%>% #Filter to Sites that are in the dataframe
  #     mutate(res_site = trimws(paste0(Reservoir,Site)))%>%
  #     filter(res_site%in%data_res_site$res_site)%>%
  #     select(-res_site)
  # }
  # sites_trimmed = trim_sites(data,sites) 
  # write.csv(sites_trimmed,"site_descriptions.csv", row.names=F)# Write to file

Download the QAQC function

We will put it in the folder where the EDI production files are to make sure that it happens and to get the most recent version

download.file("https://raw.githubusercontent.com/melofton/Reservoirs/master/Scripts/L1_functions/fluoroprobe_create.R", "FluoroProbe_qaqc_2014_2024.R")